Spaces:
Runtime error
Runtime error
from typing import Dict, List, Optional, Literal | |
from phi.document.base import Document | |
from phi.document.reader.base import Reader | |
from phi.utils.log import logger | |
from firecrawl import FirecrawlApp | |
class FirecrawlReader(Reader): | |
api_key: Optional[str] = None | |
params: Optional[Dict] = None | |
mode: Literal["scrape", "crawl"] = "scrape" | |
def scrape(self, url: str) -> List[Document]: | |
""" | |
Scrapes a website and returns a list of documents. | |
Args: | |
url: The URL of the website to scrape | |
Returns: | |
A list of documents | |
""" | |
logger.debug(f"Scraping: {url}") | |
app = FirecrawlApp(api_key=self.api_key) | |
scraped_data = app.scrape_url(url) | |
content = scraped_data.get("content") | |
metadata = scraped_data.get("metadata") | |
documents = [] | |
if self.chunk: | |
documents.extend(self.chunk_document(Document(name=url, id=url, meta_data=metadata, content=content))) | |
else: | |
documents.append(Document(name=url, id=url, meta_data=metadata, content=content)) | |
return documents | |
def read(self, url: str) -> List[Document]: | |
""" | |
Args: | |
url: The URL of the website to scrape | |
Returns: | |
A list of documents | |
""" | |
if self.mode == "scrape": | |
return self.scrape(url) | |
else: | |
raise NotImplementedError("Crawl mode is not implemented yet") | |