File size: 1,472 Bytes
105b369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from typing import Dict, List, Optional, Literal

from phi.document.base import Document
from phi.document.reader.base import Reader
from phi.utils.log import logger

from firecrawl import FirecrawlApp


class FirecrawlReader(Reader):
    api_key: Optional[str] = None
    params: Optional[Dict] = None
    mode: Literal["scrape", "crawl"] = "scrape"

    def scrape(self, url: str) -> List[Document]:
        """
        Scrapes a website and returns a list of documents.

        Args:
            url: The URL of the website to scrape

        Returns:
            A list of documents
        """

        logger.debug(f"Scraping: {url}")

        app = FirecrawlApp(api_key=self.api_key)
        scraped_data = app.scrape_url(url)
        content = scraped_data.get("content")
        metadata = scraped_data.get("metadata")

        documents = []
        if self.chunk:
            documents.extend(self.chunk_document(Document(name=url, id=url, meta_data=metadata, content=content)))
        else:
            documents.append(Document(name=url, id=url, meta_data=metadata, content=content))
        return documents

    def read(self, url: str) -> List[Document]:
        """

        Args:
            url: The URL of the website to scrape

        Returns:
            A list of documents
        """

        if self.mode == "scrape":
            return self.scrape(url)
        else:
            raise NotImplementedError("Crawl mode is not implemented yet")