Spaces:
Runtime error
Runtime error
"""Loading logic for loading documents from an s3 directory.""" | |
from typing import List | |
from langchain.docstore.document import Document | |
from langchain.document_loaders.base import BaseLoader | |
from langchain.document_loaders.s3_file import S3FileLoader | |
class S3DirectoryLoader(BaseLoader): | |
"""Loading logic for loading documents from s3.""" | |
def __init__(self, bucket: str, prefix: str = ""): | |
"""Initialize with bucket and key name.""" | |
self.bucket = bucket | |
self.prefix = prefix | |
def load(self) -> List[Document]: | |
"""Load documents.""" | |
try: | |
import boto3 | |
except ImportError: | |
raise ValueError( | |
"Could not import boto3 python package. " | |
"Please it install it with `pip install boto3`." | |
) | |
s3 = boto3.resource("s3") | |
bucket = s3.Bucket(self.bucket) | |
docs = [] | |
for obj in bucket.objects.filter(Prefix=self.prefix): | |
loader = S3FileLoader(self.bucket, obj.key) | |
docs.extend(loader.load()) | |
return docs | |