|
"""Base reader class.""" |
|
from abc import abstractmethod |
|
from typing import Any, List |
|
|
|
from langchain.docstore.document import Document as LCDocument |
|
from application.parser.schema.base import Document |
|
|
|
|
|
class BaseReader: |
|
"""Utilities for loading data from a directory.""" |
|
|
|
@abstractmethod |
|
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]: |
|
"""Load data from the input directory.""" |
|
|
|
def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]: |
|
"""Load data in LangChain document format.""" |
|
docs = self.load_data(**load_kwargs) |
|
return [d.to_langchain_format() for d in docs] |
|
|