|
from metaphor_python import Metaphor |
|
from langchain.agents import tool |
|
from typing import List, Optional |
|
from langchain.tools.base import ToolException |
|
from langchain.tools import WikipediaQueryRun |
|
from langchain.utilities import WikipediaAPIWrapper |
|
from crawlbase import CrawlingAPI |
|
import streamlit as st |
|
|
|
|
|
client = Metaphor(api_key=st.secrets["METAPHOR_API_KEY"]) |
|
|
|
@tool |
|
def search(query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): |
|
"""Call search engine with a query """ |
|
try: |
|
result = client.search(query, use_autoprompt=True, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) |
|
if "I'm sorry" in result.autoprompt_string: |
|
raise Exception(result.autoprompt_string) |
|
return result |
|
except Exception as e: |
|
raise ToolException(e.args[0]) |
|
|
|
|
|
@tool |
|
def get_contents(ids: List[str]): |
|
"""Get contents of a webpage. May return an empty content, it means you have to use another tool to get the content. |
|
|
|
The ids passed in should be a list of ids as fetched from `search`. |
|
""" |
|
try: |
|
return client.get_contents(ids) |
|
except Exception as e: |
|
raise ToolException(e.args[0]) |
|
|
|
|
|
@tool |
|
def find_similar(url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): |
|
"""Get search results similar to a given URL. |
|
|
|
The url passed in should be a URL returned from `search` |
|
""" |
|
try: |
|
return client.find_similar(url, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) |
|
except Exception as e: |
|
raise ToolException(e.args[0]) |
|
|
|
|
|
crawling_api_key = st.secrets["CRAWLING_API_KEY"] |
|
api = CrawlingAPI({'token': crawling_api_key}) |
|
|
|
|
|
@tool |
|
def scrape_page(url: str): |
|
"""Get content of a given URL to process by an agent. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) |
|
""" |
|
response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) |
|
content = response['json'] |
|
return content |
|
|
|
|
|
|
|
@tool(return_direct=True) |
|
def scrape_page_and_return_the_content_directly(url: str): |
|
"""Use this tool to directly get content of a given URL without processing it. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) |
|
""" |
|
response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) |
|
content = response['json'] |
|
return content |
|
|
|
|
|
def get_tools(): |
|
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) |
|
wikipedia.description = wikipedia.description + " Prioritise this tool if you want to learn about facts." |
|
return [scrape_page, search, get_contents, find_similar, wikipedia, scrape_page_and_return_the_content_directly] |
|
|
|
|