File size: 1,902 Bytes
105b369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import json
from typing import List, Optional

from phi.document import Document
from phi.knowledge.website import WebsiteKnowledgeBase
from phi.tools import Toolkit
from phi.utils.log import logger


class WebsiteTools(Toolkit):
    def __init__(self, knowledge_base: Optional[WebsiteKnowledgeBase] = None):
        super().__init__(name="website_tools")
        self.knowledge_base: Optional[WebsiteKnowledgeBase] = knowledge_base

        if self.knowledge_base is not None and isinstance(self.knowledge_base, WebsiteKnowledgeBase):
            self.register(self.add_website_to_knowledge_base)
        else:
            self.register(self.read_url)

    def add_website_to_knowledge_base(self, url: str) -> str:
        """This function adds a websites content to the knowledge base.
        NOTE: The website must start with https:// and should be a valid website.

        USE THIS FUNCTION TO GET INFORMATION ABOUT PRODUCTS FROM THE INTERNET.

        :param url: The url of the website to add.
        :return: 'Success' if the website was added to the knowledge base.
        """
        if self.knowledge_base is None:
            return "Knowledge base not provided"

        logger.debug(f"Adding to knowledge base: {url}")
        self.knowledge_base.urls.append(url)
        logger.debug("Loading knowledge base.")
        self.knowledge_base.load(recreate=False)
        return "Success"

    def read_url(self, url: str) -> str:
        """This function reads a url and returns the content.

        :param url: The url of the website to read.
        :return: Relevant documents from the website.
        """
        from phi.document.reader.website import WebsiteReader

        website = WebsiteReader()

        logger.debug(f"Reading website: {url}")
        relevant_docs: List[Document] = website.read(url=url)
        return json.dumps([doc.to_dict() for doc in relevant_docs])