AmmarFahmy
adding all files
105b369
from typing import List, Iterator, Optional
from phi.document import Document
from phi.aws.resource.s3.bucket import S3Bucket
from phi.aws.resource.s3.object import S3Object
from phi.knowledge.base import AssistantKnowledge
class S3KnowledgeBase(AssistantKnowledge):
# Provide either bucket or bucket_name
bucket: Optional[S3Bucket] = None
bucket_name: Optional[str] = None
# Provide either object or key
key: Optional[str] = None
object: Optional[S3Object] = None
# Filter objects by prefix
# Ignored if object or key is provided
prefix: Optional[str] = None
@property
def document_lists(self) -> Iterator[List[Document]]:
raise NotImplementedError
@property
def s3_objects(self) -> List[S3Object]:
"""Iterate over PDFs in a s3 bucket and yield lists of documents.
Each object yielded by the iterator is a list of documents.
Returns:
Iterator[List[Document]]: Iterator yielding list of documents
"""
s3_objects_to_read: List[S3Object] = []
if self.bucket is None and self.bucket_name is None:
raise ValueError("No bucket or bucket_name provided")
if self.bucket is not None and self.bucket_name is not None:
raise ValueError("Provide either bucket or bucket_name")
if self.object is not None and self.key is not None:
raise ValueError("Provide either object or key")
if self.bucket_name is not None:
self.bucket = S3Bucket(name=self.bucket_name)
if self.bucket is not None:
if self.key is not None:
_object = S3Object(bucket_name=self.bucket.name, name=self.key)
s3_objects_to_read.append(_object)
elif self.object is not None:
s3_objects_to_read.append(self.object)
elif self.prefix is not None:
s3_objects_to_read.extend(self.bucket.get_objects(prefix=self.prefix))
else:
s3_objects_to_read.extend(self.bucket.get_objects())
return s3_objects_to_read