agent-course-final-project

Sleeping

App Files Files Community

renwei2024 commited on Jun 8

Commit

5067f71

1 Parent(s): acb85aa

Add tool text_web_browser and update requirements.txt

Browse files

Files changed (3) hide show

requirements.txt +2 -0
tools/cookies.py +715 -0
tools/text_web_browser.py +567 -0

requirements.txt CHANGED Viewed

@@ -11,3 +11,5 @@ pydub
 SpeechRecognition
 beautifulsoup4
 youtube-transcript-api

 SpeechRecognition
 beautifulsoup4
 youtube-transcript-api
+pathvalidate
+serpapi

tools/cookies.py ADDED Viewed

	@@ -0,0 +1,715 @@

+from requests.cookies import RequestsCookieJar
+COOKIES_LIST = [
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1718884961,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "ST-xuwub9",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "session_logininfo=AFmmF2swRAIgf4gadACOuWOcipI1anW-dakEjtidNLkufnOC8uml7EECIDh2YisqWELDBJPTGUysCucJ3I0wjXxYjVHro1LHrdW0%3AQUQ3MjNmd2Jiajl3OWZYRnpFNnZlWWV5ZGJWZ0hpcmp4LVVPU280bk4zOS03Z0ozZG9fOFhWZ0dXaVo3NG1wTEg1b3hGaG10TFBlaFBnTlJfbER5bEp0aFhoNS1OLVhYNFRZT2F6ajgzOFpDbGhlUjZpMWRETlFFRjFfTTRiM0RnNTROSkdmMTFMVjFic1VuZ2trbGp4aktDa0JJUC1BWDh3",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753004444.745411,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-YEC",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "CgtRVnI5LW1zRHlQVSjbtNCzBjIhCgJGUhIbEhcSFRMLFBUWFwwYGRobHB0eHw4PIBAREiAk",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050824,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSID",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBB4ezJ_bdWu46a7YwObVn44wACgYKAakSARQSFQHGX2MicJcTzecTKH6bHzqU6TMbTxoVAUF8yKqQYK-MoI6Ql3vI2oYTB3E-0076",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974642,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SIDCC",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzWQZauHKOo8t87zoEcjaVNIYUX54ohoWXT-tX4aAhEuZzIIptxZAcNkHuG2oDXYL6t-lw",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050652,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBB6VHrZcC3gBAsFPbCQ0gF5AACgYKAYkSARQSFQHGX2Mi9kt0gHg5CxCYSkLQGHWaeBoVAUF8yKre_V6r3jZVak6JV4o2Q0FL0076",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420958.397534,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSIDTS",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "sidts-CjIB3EgAEkYL2L-GfrEzW5Dfy62S9oefGNLgst78S_986htCnGcfkxECch_9oz-qytSsZBAA",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753433494.44729,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_M0180HEFCY",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718871908.1.0.1718873494.0.0.0",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050933,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SAPISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974764,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSIDCC",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzWHDSoXGCZpZhPxRrnC7B1s8zGIUjeMVyvgtQfsm1fs92lXPtFEI_td9LBUyqVUe0xK",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050881,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "SSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AmlwXHnQvOQ10LVd-",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050959,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__Secure-1PAPISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050795,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBBrlk7lRpKQGywAHEon7WGQAACgYKAQsSARQSFQHGX2MirAmnSRdZl6GPG6KLd4hOihoVAUF8yKoV17Tcj1a_OenIOkf2wBjO0076",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050993,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__Secure-3PAPISID",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974815,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSIDCC",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzXM5UjKUEXwSHVmRAIo6hGHA4G63adj3EE1VdNriD0f38jZQbsUKiD4LQbA3BValmTFDg",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420958.397647,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSIDTS",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "sidts-CjIB3EgAEkYL2L-GfrEzW5Dfy62S9oefGNLgst78S_986htCnGcfkxECch_9oz-qytSsZBAA",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050908,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "APISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "IlQWLPjdNqziwCrV/ANG7Z4x5FF-IBxbZk",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050855,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "HSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "AasA7hmRuTFv7vjoq",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753435873.577793,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "LOGIN_INFO",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AFmmF2swRAIgf4gadACOuWOcipI1anW-dakEjtidNLkufnOC8uml7EECIDh2YisqWELDBJPTGUysCucJ3I0wjXxYjVHro1LHrdW0:QUQ3MjNmd2Jiajl3OWZYRnpFNnZlWWV5ZGJWZ0hpcmp4LVVPU280bk4zOS03Z0ozZG9fOFhWZ0dXaVo3NG1wTEg1b3hGaG10TFBlaFBnTlJfbER5bEp0aFhoNS1OLVhYNFRZT2F6ajgzOFpDbGhlUjZpMWRETlFFRjFfTTRiM0RnNTROSkdmMTFMVjFic1VuZ2trbGp4aktDa0JJUC1BWDh3",
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753444956.555608,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "PREF",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "f4=4000000&f6=40000000&tz=Europe.Paris&f5=30000&f7=100",
+    },
+]
+COOKIES_LIST += [
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "isInstIp",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "False",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1734423981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__eoi",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "ID=c26f752377373146:T=1718871981:RT=1718884914:S=AA-AfjZw-T_OOX2kW2LLaFzXImgc",
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "expirationDate": 1753444909.646103,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "ptc",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "RG1.8947708639250500550.1718872043",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750507578,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "euconsent-v2-didomi",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "CQAgmoAQAgmoAAHABBENA5EsAP_gAEPgAAYgJ2pB5G5UTWlBIG53YMskIAUFhFBoQEAgAACAAwIBSBIAIIwEAGAAIAgAICACAAIAIBIAIABAGAAAAAAAYIAAIAAIAAAQIAAKIAAAAAAAAgBQAAgIAgggEAAAgEBEABAAgAAAEIIAQNgACgAAACCAAAAAAAABAAAAAAAAQAAAAAAAYCQAAAJIAAAAACAIABAIAAAAAAAAAAAAAAAABBAAIJ2wPIAFAAXABQAFQALgAcAA8ACAAEgALwAZAA0ACIAEcAJgAUgAqgBcADEAGgAPQAfgBEACOAE4AMMAZYA0QBsgDkAHOAO4AfsBBwEIAItARwBHQC6gHUAO2Ae0A_4CHQEXgJ2AUOAo8BT4CpQFqALYAXmAwQBkgDLAGXANjAhCBG8CbAE3gJ1gTtAA.f_wACHwAAAAA",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718885236,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gat",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1",
+    },
+    {
+        "domain": "www.researchgate.net",
+        "expirationDate": 1721477183,
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "_pbjs_userid_consent_data",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "3524755945110770",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1752567981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__gads",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "ID=eca2adb88969c830:T=1718871981:RT=1718884914:S=ALNI_MY2qZchynrhWX6hWMlaI87Pcj9riQ",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718886709.646173,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__cf_bm",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "IkQ_J4ciBzKQduRvjqsfSmQu8UygDWbHeROO5JVccfo-1718884909-1.0.1.1-qvNGEdbfI0HfhFP6kwe7R7mkTqODNhFuKhs72lLly6K2BOPMG3kbahpQFGvPK0U8FUfkznkq65gngd1sWj7sDA",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1752567981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__gpi",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "UID=00000e4e9aa2e6f2:T=1718871981:RT=1718884914:S=ALNI_MYFNrgzkKn7K6Bd2y8hC6GJCvDiSg",
+    },
+    {
+        "domain": ".researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "_cfuvid",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "_GPmGZkBymiH3UiqTqzakEpi98br3nfFUWC2_u_wqkc-1718884909785-0.0.1.1-604800000",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1753445177.271667,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.1.1525244793.1718885177",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1753445177.271482,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_4P31SJ70EJ",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718885177.1.0.1718885177.0.0.0",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718971576,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gid",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.854907463.1718885177",
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "expirationDate": 1750407982.506505,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "did",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "1dWLO3C6am8l667Q4VUlBo0O1LI49Qi2Vw21SJEXHavBDYT56DI9007W5rYGVFVH",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750507578,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "didomi_token",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "eyJ1c2VyX2lkIjoiMTkwMzU4YTUtNWU2My02Y2UzLWJlNzAtZGFjNzVmYjdiY2ExIiwiY3JlYXRlZCI6IjIwMjQtMDYtMjBUMTI6MDY6MTYuODA2WiIsInVwZGF0ZWQiOiIyMDI0LTA2LTIwVDEyOjA2OjE4Ljc4MVoiLCJ2ZW5kb3JzIjp7ImVuYWJsZWQiOlsidHdpdHRlciIsImdvb2dsZSIsImM6bGlua2VkaW4tbWFya2V0aW5nLXNvbHV0aW9ucyIsImM6b3duZXJpcSIsImM6b21uaXR1cmUtYWRvYmUtYW5hbHl0aWNzIiwiYzp0ZWNobm9yYXRpLW1lZGlhIiwiYzppbnRlcmNvbSIsImM6aW50ZW50LWlxIiwiYzppcHJvbSIsImM6bGlua2VkaW4iLCJjOmFtYXpvbmFkdi16Y1hGTEI2WCIsImM6bWVkaWFuZXQtY1V3YUtFNnoiLCJjOmluZGV4ZXhjaC1OWkNRTTY4UCIsImM6emVvdGFwZ21iLWQ3YndtdGp3IiwiYzp0cmlwbGVsaWYtZGRKSDM0clkiLCJjOnJ0YmhvdXNlLWI4Y2RIOHRNIiwiYzptZHByaW1pcy1lYU4yOVdjUCIsImM6bG9vcG1lbGktVGRhWXRCUHEiLCJjOm1hZ25pdGVpbi05d1RZTHFSRCIsImM6Ymlkc3dpdGNoLWQ2N0V3N1c5IiwiYzpvcmFjbGVhZHYtcUhlREptQUwiLCJjOmdvb2dsZWFuYS00VFhuSmlnUiIsImM6bG90YW1lc29sLURIaTdMUmpNIiwiYzpuZXh0bWlsbGUtR0pyZlg4VWMiLCJjOm5yaWNodGVjLXFVVlEyUlFxIiwiYzpicml0ZXBvb2wtQldWeVdHeVUiLCJjOnRhcGFkaW5jLXFxY2tVN1BXIiwiYzppZDV0ZWNobi16Tk1KNGR3ZiIsImM6bWljcm9zb2Z0IiwiYzpwZXJtdXRpdmUtSjdpaHJlTWsiLCJjOm9wZXJhc29mdC1CY1hjRFZKTSIsImM6cG9zdGhvZy1Cakp4RmRGOSJdfSwicHVycG9zZXMiOnsiZW5hYmxlZCI6WyJnZW9sb2NhdGlvbl9kYXRhIiwiZGV2aWNlX2NoYXJhY3RlcmlzdGljcyJdfSwidmVuZG9yc19saSI6eyJlbmFibGVkIjpbImdvb2dsZSIsImM6b3BlcmFzb2Z0LUJjWGNEVkpNIl19LCJ2ZXJzaW9uIjoyLCJhYyI6IkRIU0FvQUZrQWNnQTVnSHFnUUhBeGdCNndEMTRJR0FRTkFqMEJJd0NTY0VyQUtCd1YtZ3MxQmgwREc0R09nQUEuREhTQW9BRmtBY2dBNWdIcWdRSEF4Z0I2d0QxNElHQVFOQWowQkl3Q1NjRXJBS0J3Vi1nczFCaDBERzRHT2dBQSJ9",
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "hasPdpNext",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "False",
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750421183,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "ph_phc_ma1XTQyee96N1GML6qUTgLQRiDifnRcE9STiHTZ0CfZ_posthog",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "%7B%22distinct_id%22%3A%220190358a-56a1-7313-83b0-d13dddeac787%22%2C%22%24sesid%22%3A%5B1718885183223%2C%220190358a-56a1-7313-83b0-d13b2b87778d%22%2C1718885176993%5D%2C%22%24session_is_sampled%22%3Atrue%7D",
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "sid",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "qmH5Lc4f0CUJ3zeaxORcV0S8I8V1MuCFZtcIQqPYtv1XPejrbSLAQRbT50PL40TqeKQ1XsQDWt9gtYVzuL80bRmPjw6jn3cQ0ikNqW40maHcQ3JL2Vfa8ZZf0j7p35eJ",
+    },
+]
+COOKIES_LIST += [
+    {
+        "domain": "github.com",
+        "hostOnly": True,
+        "httpOnly": True,
+        "name": "_gh_sess",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "P%2Fmof1avuqwHaUQUIJR%2FZYn7jqbT7lgGuTGjp1BGAFIG5UpNDusEE3b8dRjz0eATE5xPdPjLYFqMs%2FI9AOalKX4YuYfSEEnxCMawU01099b4o9Xzzcv%2BmecrmO0Q8q%2Bdq1h8SIv6nvPP7HzlFesl8ysafb9b%2F0q6dTArKdSOurasza8UgLSYD08ofA50Pcm0IG7CTzF8ZCizrGgGTMi%2F%2B7L3E17jav5PM1Sf2vQKg15Gbg1QIOppJJHzlufgQoZigqFv%2BWznaws0Tt7Y2lSFCw%3D%3D--CJRhqMXJnwOaJgk4--DhUErlL4GdROikEjKD4O9g%3D%3D",
+    },
+    {
+        "domain": ".github.com",
+        "expirationDate": 1750408875.763785,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_octo",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "GH1.1.728652011.1718872875",
+    },
+    {
+        "domain": ".github.com",
+        "expirationDate": 1750408875.763926,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "logged_in",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "no",
+    },
+    {
+        "domain": ".github.com",
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "preferred_color_mode",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "dark",
+    },
+    {
+        "domain": ".github.com",
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "tz",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "Europe%2FParis",
+    },
+]
+COOKIES_LIST += [
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718886430,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gat",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1",
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718972770,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gid",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.402246368.1606169825",
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1753446370.315621,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.1301409987.1606169825",
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1750422367,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_hjid",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "07f80263-a631-4bf4-8ffd-8fc8912085e2",
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718888167,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_hjFirstSeen",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1",
+    },
+]
+COOKIES_LIST += [
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "AWSELBCORS",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "CBD1D7FF1216388FA48838CBCA4774FD22800B8FB548A40EF92BB0994D5B77A8410307CDEAA69C52236663F2BF89B252C17BC0FCDF790FD59771BDDF6EA8CA4CFD29D8733F",
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1753452454.637671,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_9R61FWK9H5",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718892454.1.0.1718892454.0.0.0",
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1753452454.63421,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.1.2021310691.1718892455",
+    },
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "AWSELB",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": True,
+        "storeId": None,
+        "value": "CBD1D7FF1216388FA48838CBCA4774FD22800B8FB548A40EF92BB0994D5B77A8410307CDEAA69C52236663F2BF89B252C17BC0FCDF790FD59771BDDF6EA8CA4CFD29D8733F",
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1750428454,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "OptanonAlertBoxClosed",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "2024-06-20T14:07:34.583Z",
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1750428454,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "OptanonConsent",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "isGpcEnabled=0&datestamp=Thu+Jun+20+2024+16%3A07%3A34+GMT%2B0200+(heure+d%E2%80%99%C3%A9t%C3%A9+d%E2%80%99Europe+centrale)&version=202310.2.0&browserGpcFlag=0&isIABGlobal=False&hosts=&landingPath=NotLandingPage&groups=C0001%3A1%2CC0003%3A1%2CC0002%3A1%2CC0004%3A1",
+    },
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "XSRF-TOKEN",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "6957be7a-bcb4-4d59-a522-ea9b6b210ed9",
+    },
+]
+# Create a RequestsCookieJar instance
+COOKIES = RequestsCookieJar()
+# Add cookies to the jar
+for cookie in COOKIES_LIST:
+    COOKIES.set(cookie["name"], cookie["value"], domain=cookie["domain"], path=cookie["path"])

tools/text_web_browser.py ADDED Viewed

	@@ -0,0 +1,567 @@

+# Shamelessly stolen from Microsoft Autogen team: thanks to them for this great resource!
+# https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/browser_utils.py
+import mimetypes
+import os
+import pathlib
+import re
+import time
+import uuid
+from typing import Any
+from urllib.parse import unquote, urljoin, urlparse
+import pathvalidate
+import requests
+from serpapi import GoogleSearch
+from smolagents import Tool
+from .cookies import COOKIES
+from .mdconvert import FileConversionException, MarkdownConverter, UnsupportedFormatException
+class SimpleTextBrowser:
+    """(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
+    def __init__(
+        self,
+        start_page: str | None = None,
+        viewport_size: int | None = 1024 * 8,
+        downloads_folder: str | None | None = None,
+        serpapi_key: str | None | None = None,
+        request_kwargs: dict[str, Any] | None | None = None,
+    ):
+        self.start_page: str = start_page if start_page else "about:blank"
+        self.viewport_size = viewport_size  # Applies only to the standard uri types
+        self.downloads_folder = downloads_folder
+        self.history: list[tuple[str, float]] = list()
+        self.page_title: str | None = None
+        self.viewport_current_page = 0
+        self.viewport_pages: list[tuple[int, int]] = list()
+        self.set_address(self.start_page)
+        self.serpapi_key = serpapi_key
+        self.request_kwargs = request_kwargs
+        self.request_kwargs["cookies"] = COOKIES
+        self._mdconvert = MarkdownConverter()
+        self._page_content: str = ""
+        self._find_on_page_query: str | None = None
+        self._find_on_page_last_result: int | None = None  # Location of the last result
+    @property
+    def address(self) -> str:
+        """Return the address of the current page."""
+        return self.history[-1][0]
+    def set_address(self, uri_or_path: str, filter_year: int | None = None) -> None:
+        # TODO: Handle anchors
+        self.history.append((uri_or_path, time.time()))
+        # Handle special URIs
+        if uri_or_path == "about:blank":
+            self._set_page_content("")
+        elif uri_or_path.startswith("google:"):
+            self._serpapi_search(uri_or_path[len("google:") :].strip(), filter_year=filter_year)
+        else:
+            if (
+                not uri_or_path.startswith("http:")
+                and not uri_or_path.startswith("https:")
+                and not uri_or_path.startswith("file:")
+            ):
+                if len(self.history) > 1:
+                    prior_address = self.history[-2][0]
+                    uri_or_path = urljoin(prior_address, uri_or_path)
+                    # Update the address with the fully-qualified path
+                    self.history[-1] = (uri_or_path, self.history[-1][1])
+            self._fetch_page(uri_or_path)
+        self.viewport_current_page = 0
+        self.find_on_page_query = None
+        self.find_on_page_viewport = None
+    @property
+    def viewport(self) -> str:
+        """Return the content of the current viewport."""
+        bounds = self.viewport_pages[self.viewport_current_page]
+        return self.page_content[bounds[0] : bounds[1]]
+    @property
+    def page_content(self) -> str:
+        """Return the full contents of the current page."""
+        return self._page_content
+    def _set_page_content(self, content: str) -> None:
+        """Sets the text content of the current page."""
+        self._page_content = content
+        self._split_pages()
+        if self.viewport_current_page >= len(self.viewport_pages):
+            self.viewport_current_page = len(self.viewport_pages) - 1
+    def page_down(self) -> None:
+        self.viewport_current_page = min(self.viewport_current_page + 1, len(self.viewport_pages) - 1)
+    def page_up(self) -> None:
+        self.viewport_current_page = max(self.viewport_current_page - 1, 0)
+    def find_on_page(self, query: str) -> str | None:
+        """Searches for the query from the current viewport forward, looping back to the start if necessary."""
+        # Did we get here via a previous find_on_page search with the same query?
+        # If so, map to find_next
+        if query == self._find_on_page_query and self.viewport_current_page == self._find_on_page_last_result:
+            return self.find_next()
+        # Ok it's a new search start from the current viewport
+        self._find_on_page_query = query
+        viewport_match = self._find_next_viewport(query, self.viewport_current_page)
+        if viewport_match is None:
+            self._find_on_page_last_result = None
+            return None
+        else:
+            self.viewport_current_page = viewport_match
+            self._find_on_page_last_result = viewport_match
+            return self.viewport
+    def find_next(self) -> str | None:
+        """Scroll to the next viewport that matches the query"""
+        if self._find_on_page_query is None:
+            return None
+        starting_viewport = self._find_on_page_last_result
+        if starting_viewport is None:
+            starting_viewport = 0
+        else:
+            starting_viewport += 1
+            if starting_viewport >= len(self.viewport_pages):
+                starting_viewport = 0
+        viewport_match = self._find_next_viewport(self._find_on_page_query, starting_viewport)
+        if viewport_match is None:
+            self._find_on_page_last_result = None
+            return None
+        else:
+            self.viewport_current_page = viewport_match
+            self._find_on_page_last_result = viewport_match
+            return self.viewport
+    def _find_next_viewport(self, query: str, starting_viewport: int) -> int | None:
+        """Search for matches between the starting viewport looping when reaching the end."""
+        if query is None:
+            return None
+        # Normalize the query, and convert to a regular expression
+        nquery = re.sub(r"\*", "__STAR__", query)
+        nquery = " " + (" ".join(re.split(r"\W+", nquery))).strip() + " "
+        nquery = nquery.replace(" __STAR__ ", "__STAR__ ")  # Merge isolated stars with prior word
+        nquery = nquery.replace("__STAR__", ".*").lower()
+        if nquery.strip() == "":
+            return None
+        idxs = list()
+        idxs.extend(range(starting_viewport, len(self.viewport_pages)))
+        idxs.extend(range(0, starting_viewport))
+        for i in idxs:
+            bounds = self.viewport_pages[i]
+            content = self.page_content[bounds[0] : bounds[1]]
+            # TODO: Remove markdown links and images
+            ncontent = " " + (" ".join(re.split(r"\W+", content))).strip().lower() + " "
+            if re.search(nquery, ncontent):
+                return i
+        return None
+    def visit_page(self, path_or_uri: str, filter_year: int | None = None) -> str:
+        """Update the address, visit the page, and return the content of the viewport."""
+        self.set_address(path_or_uri, filter_year=filter_year)
+        return self.viewport
+    def _split_pages(self) -> None:
+        # Do not split search results
+        if self.address.startswith("google:"):
+            self.viewport_pages = [(0, len(self._page_content))]
+            return
+        # Handle empty pages
+        if len(self._page_content) == 0:
+            self.viewport_pages = [(0, 0)]
+            return
+        # Break the viewport into pages
+        self.viewport_pages = []
+        start_idx = 0
+        while start_idx < len(self._page_content):
+            end_idx = min(start_idx + self.viewport_size, len(self._page_content))  # type: ignore[operator]
+            # Adjust to end on a space
+            while end_idx < len(self._page_content) and self._page_content[end_idx - 1] not in [" ", "\t", "\r", "\n"]:
+                end_idx += 1
+            self.viewport_pages.append((start_idx, end_idx))
+            start_idx = end_idx
+    def _serpapi_search(self, query: str, filter_year: int | None = None) -> None:
+        if self.serpapi_key is None:
+            raise ValueError("Missing SerpAPI key.")
+        params = {
+            "engine": "google",
+            "q": query,
+            "api_key": self.serpapi_key,
+        }
+        if filter_year is not None:
+            params["tbs"] = f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
+        search = GoogleSearch(params)
+        results = search.get_dict()
+        self.page_title = f"{query} - Search"
+        if "organic_results" not in results.keys():
+            raise Exception(f"No results found for query: '{query}'. Use a less specific query.")
+        if len(results["organic_results"]) == 0:
+            year_filter_message = f" with filter year={filter_year}" if filter_year is not None else ""
+            self._set_page_content(
+                f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter."
+            )
+            return
+        def _prev_visit(url):
+            for i in range(len(self.history) - 1, -1, -1):
+                if self.history[i][0] == url:
+                    return f"You previously visited this page {round(time.time() - self.history[i][1])} seconds ago.\n"
+            return ""
+        web_snippets: list[str] = list()
+        idx = 0
+        if "organic_results" in results:
+            for page in results["organic_results"]:
+                idx += 1
+                date_published = ""
+                if "date" in page:
+                    date_published = "\nDate published: " + page["date"]
+                source = ""
+                if "source" in page:
+                    source = "\nSource: " + page["source"]
+                snippet = ""
+                if "snippet" in page:
+                    snippet = "\n" + page["snippet"]
+                redacted_version = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{_prev_visit(page['link'])}{snippet}"
+                redacted_version = redacted_version.replace("Your browser can't play this video.", "")
+                web_snippets.append(redacted_version)
+        content = (
+            f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n"
+            + "\n\n".join(web_snippets)
+        )
+        self._set_page_content(content)
+    def _fetch_page(self, url: str) -> None:
+        download_path = ""
+        try:
+            if url.startswith("file://"):
+                download_path = os.path.normcase(os.path.normpath(unquote(url[7:])))
+                res = self._mdconvert.convert_local(download_path)
+                self.page_title = res.title
+                self._set_page_content(res.text_content)
+            else:
+                # Prepare the request parameters
+                request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
+                request_kwargs["stream"] = True
+                # Send a HTTP request to the URL
+                response = requests.get(url, **request_kwargs)
+                response.raise_for_status()
+                # If the HTTP request was successful
+                content_type = response.headers.get("content-type", "")
+                # Text or HTML
+                if "text/" in content_type.lower():
+                    res = self._mdconvert.convert_response(response)
+                    self.page_title = res.title
+                    self._set_page_content(res.text_content)
+                # A download
+                else:
+                    # Try producing a safe filename
+                    fname = None
+                    download_path = None
+                    try:
+                        fname = pathvalidate.sanitize_filename(os.path.basename(urlparse(url).path)).strip()
+                        download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
+                        suffix = 0
+                        while os.path.exists(download_path) and suffix < 1000:
+                            suffix += 1
+                            base, ext = os.path.splitext(fname)
+                            new_fname = f"{base}__{suffix}{ext}"
+                            download_path = os.path.abspath(os.path.join(self.downloads_folder, new_fname))
+                    except NameError:
+                        pass
+                    # No suitable name, so make one
+                    if fname is None:
+                        extension = mimetypes.guess_extension(content_type)
+                        if extension is None:
+                            extension = ".download"
+                        fname = str(uuid.uuid4()) + extension
+                        download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
+                    # Open a file for writing
+                    with open(download_path, "wb") as fh:
+                        for chunk in response.iter_content(chunk_size=512):
+                            fh.write(chunk)
+                    # Render it
+                    local_uri = pathlib.Path(download_path).as_uri()
+                    self.set_address(local_uri)
+        except UnsupportedFormatException as e:
+            print(e)
+            self.page_title = ("Download complete.",)
+            self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
+        except FileConversionException as e:
+            print(e)
+            self.page_title = ("Download complete.",)
+            self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
+        except FileNotFoundError:
+            self.page_title = "Error 404"
+            self._set_page_content(f"## Error 404\n\nFile not found: {download_path}")
+        except requests.exceptions.RequestException as request_exception:
+            try:
+                self.page_title = f"Error {response.status_code}"
+                # If the error was rendered in HTML we might as well render it
+                content_type = response.headers.get("content-type", "")
+                if content_type is not None and "text/html" in content_type.lower():
+                    res = self._mdconvert.convert(response)
+                    self.page_title = f"Error {response.status_code}"
+                    self._set_page_content(f"## Error {response.status_code}\n\n{res.text_content}")
+                else:
+                    text = ""
+                    for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
+                        text += chunk
+                    self.page_title = f"Error {response.status_code}"
+                    self._set_page_content(f"## Error {response.status_code}\n\n{text}")
+            except NameError:
+                self.page_title = "Error"
+                self._set_page_content(f"## Error\n\n{str(request_exception)}")
+    def _state(self) -> tuple[str, str]:
+        header = f"Address: {self.address}\n"
+        if self.page_title is not None:
+            header += f"Title: {self.page_title}\n"
+        current_page = self.viewport_current_page
+        total_pages = len(self.viewport_pages)
+        address = self.address
+        for i in range(len(self.history) - 2, -1, -1):  # Start from the second last
+            if self.history[i][0] == address:
+                header += f"You previously visited this page {round(time.time() - self.history[i][1])} seconds ago.\n"
+                break
+        header += f"Viewport position: Showing page {current_page + 1} of {total_pages}.\n"
+        return (header, self.viewport)
+class SearchInformationTool(Tool):
+    name = "web_search"
+    description = "Perform a web search query (think a google search) and returns the search results."
+    inputs = {"query": {"type": "string", "description": "The web search query to perform."}}
+    inputs["filter_year"] = {
+        "type": "string",
+        "description": "[Optional parameter]: filter the search results to only include pages from a specific year. For example, '2020' will only include pages from 2020. Make sure to use this parameter if you're trying to search for articles from a specific date!",
+        "nullable": True,
+    }
+    output_type = "string"
+    def __init__(self, browser):
+        super().__init__()
+        self.browser = browser
+    def forward(self, query: str, filter_year: int | None = None) -> str:
+        self.browser.visit_page(f"google: {query}", filter_year=filter_year)
+        header, content = self.browser._state()
+        return header.strip() + "\n=======================\n" + content
+class VisitTool(Tool):
+    name = "visit_page"
+    description = "Visit a webpage at a given URL and return its text. Given a url to a YouTube video, this returns the transcript."
+    inputs = {"url": {"type": "string", "description": "The relative or absolute url of the webpage to visit."}}
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self, url: str) -> str:
+        self.browser.visit_page(url)
+        header, content = self.browser._state()
+        return header.strip() + "\n=======================\n" + content
+class DownloadTool(Tool):
+    name = "download_file"
+    description = """
+Download a file at a given URL. The file should be of this format: [".xlsx", ".pptx", ".wav", ".mp3", ".m4a", ".png", ".docx"]
+After using this tool, for further inspection of this page you should return the download path to your manager via final_answer, and they will be able to inspect it.
+DO NOT use this tool for .pdf or .txt or .htm files: for these types of files use visit_page with the file url instead."""
+    inputs = {"url": {"type": "string", "description": "The relative or absolute url of the file to be downloaded."}}
+    output_type = "string"
+    def __init__(self, browser):
+        super().__init__()
+        self.browser = browser
+    def forward(self, url: str) -> str:
+        import requests
+        if "arxiv" in url:
+            url = url.replace("abs", "pdf")
+        response = requests.get(url)
+        content_type = response.headers.get("content-type", "")
+        extension = mimetypes.guess_extension(content_type)
+        if extension and isinstance(extension, str):
+            new_path = f"./downloads/file{extension}"
+        else:
+            new_path = "./downloads/file.object"
+        with open(new_path, "wb") as f:
+            f.write(response.content)
+        if "pdf" in extension or "txt" in extension or "htm" in extension:
+            raise Exception("Do not use this tool for pdf or txt or html files: use visit_page instead.")
+        return f"File was downloaded and saved under path {new_path}."
+class ArchiveSearchTool(Tool):
+    name = "find_archived_url"
+    description = "Given a url, searches the Wayback Machine and returns the archived version of the url that's closest in time to the desired date."
+    inputs = {
+        "url": {"type": "string", "description": "The url you need the archive for."},
+        "date": {
+            "type": "string",
+            "description": "The date that you want to find the archive for. Give this date in the format 'YYYYMMDD', for instance '27 June 2008' is written as '20080627'.",
+        },
+    }
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self, url, date) -> str:
+        import requests
+        no_timestamp_url = f"https://archive.org/wayback/available?url={url}"
+        archive_url = no_timestamp_url + f"&timestamp={date}"
+        response = requests.get(archive_url).json()
+        response_notimestamp = requests.get(no_timestamp_url).json()
+        if "archived_snapshots" in response and "closest" in response["archived_snapshots"]:
+            closest = response["archived_snapshots"]["closest"]
+            print("Archive found!", closest)
+        elif "archived_snapshots" in response_notimestamp and "closest" in response_notimestamp["archived_snapshots"]:
+            closest = response_notimestamp["archived_snapshots"]["closest"]
+            print("Archive found!", closest)
+        else:
+            raise Exception(f"Your {url=} was not archived on Wayback Machine, try a different url.")
+        target_url = closest["url"]
+        self.browser.visit_page(target_url)
+        header, content = self.browser._state()
+        return (
+            f"Web archive for url {url}, snapshot taken at date {closest['timestamp'][:8]}:\n"
+            + header.strip()
+            + "\n=======================\n"
+            + content
+        )
+class PageUpTool(Tool):
+    name = "page_up"
+    description = "Scroll the viewport UP one page-length in the current webpage and return the new viewport content."
+    inputs = {}
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self) -> str:
+        self.browser.page_up()
+        header, content = self.browser._state()
+        return header.strip() + "\n=======================\n" + content
+class PageDownTool(Tool):
+    name = "page_down"
+    description = (
+        "Scroll the viewport DOWN one page-length in the current webpage and return the new viewport content."
+    )
+    inputs = {}
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self) -> str:
+        self.browser.page_down()
+        header, content = self.browser._state()
+        return header.strip() + "\n=======================\n" + content
+class FinderTool(Tool):
+    name = "find_on_page_ctrl_f"
+    description = "Scroll the viewport to the first occurrence of the search string. This is equivalent to Ctrl+F."
+    inputs = {
+        "search_string": {
+            "type": "string",
+            "description": "The string to search for on the page. This search string supports wildcards like '*'",
+        }
+    }
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self, search_string: str) -> str:
+        find_result = self.browser.find_on_page(search_string)
+        header, content = self.browser._state()
+        if find_result is None:
+            return (
+                header.strip()
+                + f"\n=======================\nThe search string '{search_string}' was not found on this page."
+            )
+        else:
+            return header.strip() + "\n=======================\n" + content
+class FindNextTool(Tool):
+    name = "find_next"
+    description = "Scroll the viewport to next occurrence of the search string. This is equivalent to finding the next match in a Ctrl+F search."
+    inputs = {}
+    output_type = "string"
+    def __init__(self, browser=None):
+        super().__init__()
+        self.browser = browser
+    def forward(self) -> str:
+        find_result = self.browser.find_next()
+        header, content = self.browser._state()
+        if find_result is None:
+            return header.strip() + "\n=======================\nThe search string was not found on this page."
+        else:
+            return header.strip() + "\n=======================\n" + content