File size: 792 Bytes
105b369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from typing import List, Iterator

from phi.document import Document
from phi.document.reader.s3.text import S3TextReader
from phi.knowledge.s3.base import S3KnowledgeBase


class S3TextKnowledgeBase(S3KnowledgeBase):
    formats: List[str] = [".doc", ".docx"]
    reader: S3TextReader = S3TextReader()

    @property
    def document_lists(self) -> Iterator[List[Document]]:
        """Iterate over text files in a s3 bucket and yield lists of documents.
        Each object yielded by the iterator is a list of documents.

        Returns:
            Iterator[List[Document]]: Iterator yielding list of documents
        """

        for s3_object in self.s3_objects:
            if s3_object.name.endswith(tuple(self.formats)):
                yield self.reader.read(s3_object=s3_object)