Spaces:
Runtime error
Runtime error
import os | |
import sys | |
import asyncio | |
import shutil | |
from typing import List | |
import tempfile | |
import time | |
# Add the parent directory to the Python path | |
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
sys.path.append(parent_dir) | |
from crawl4ai.async_webcrawler import AsyncWebCrawler | |
class TestDownloads: | |
def __init__(self): | |
self.temp_dir = tempfile.mkdtemp(prefix="crawl4ai_test_") | |
self.download_dir = os.path.join(self.temp_dir, "downloads") | |
os.makedirs(self.download_dir, exist_ok=True) | |
self.results: List[str] = [] | |
def cleanup(self): | |
shutil.rmtree(self.temp_dir) | |
def log_result(self, test_name: str, success: bool, message: str = ""): | |
result = f"{'✅' if success else '❌'} {test_name}: {message}" | |
self.results.append(result) | |
print(result) | |
async def test_basic_download(self): | |
"""Test basic file download functionality""" | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
downloads_path=self.download_dir, | |
verbose=True | |
) as crawler: | |
# Python.org downloads page typically has stable download links | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code=""" | |
// Click first download link | |
const downloadLink = document.querySelector('a[href$=".exe"]'); | |
if (downloadLink) downloadLink.click(); | |
""" | |
) | |
success = result.downloaded_files is not None and len(result.downloaded_files) > 0 | |
self.log_result( | |
"Basic Download", | |
success, | |
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" | |
) | |
except Exception as e: | |
self.log_result("Basic Download", False, str(e)) | |
async def test_persistent_context_download(self): | |
"""Test downloads with persistent context""" | |
try: | |
user_data_dir = os.path.join(self.temp_dir, "user_data") | |
os.makedirs(user_data_dir, exist_ok=True) | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
downloads_path=self.download_dir, | |
use_persistent_context=True, | |
user_data_dir=user_data_dir, | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code=""" | |
const downloadLink = document.querySelector('a[href$=".exe"]'); | |
if (downloadLink) downloadLink.click(); | |
""" | |
) | |
success = result.downloaded_files is not None and len(result.downloaded_files) > 0 | |
self.log_result( | |
"Persistent Context Download", | |
success, | |
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" | |
) | |
except Exception as e: | |
self.log_result("Persistent Context Download", False, str(e)) | |
async def test_multiple_downloads(self): | |
"""Test multiple simultaneous downloads""" | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
downloads_path=self.download_dir, | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code=""" | |
// Click multiple download links | |
const downloadLinks = document.querySelectorAll('a[href$=".exe"]'); | |
downloadLinks.forEach(link => link.click()); | |
""" | |
) | |
success = result.downloaded_files is not None and len(result.downloaded_files) > 1 | |
self.log_result( | |
"Multiple Downloads", | |
success, | |
f"Downloaded {len(result.downloaded_files or [])} files" if success else "Not enough files downloaded" | |
) | |
except Exception as e: | |
self.log_result("Multiple Downloads", False, str(e)) | |
async def test_different_browsers(self): | |
"""Test downloads across different browser types""" | |
browsers = ["chromium", "firefox", "webkit"] | |
for browser_type in browsers: | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
downloads_path=self.download_dir, | |
browser_type=browser_type, | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code=""" | |
const downloadLink = document.querySelector('a[href$=".exe"]'); | |
if (downloadLink) downloadLink.click(); | |
""" | |
) | |
success = result.downloaded_files is not None and len(result.downloaded_files) > 0 | |
self.log_result( | |
f"{browser_type.title()} Download", | |
success, | |
f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" | |
) | |
except Exception as e: | |
self.log_result(f"{browser_type.title()} Download", False, str(e)) | |
async def test_edge_cases(self): | |
"""Test various edge cases""" | |
# Test 1: Downloads without specifying download path | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code="document.querySelector('a[href$=\".exe\"]').click()" | |
) | |
self.log_result( | |
"Default Download Path", | |
True, | |
f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}" | |
) | |
except Exception as e: | |
self.log_result("Default Download Path", False, str(e)) | |
# Test 2: Downloads with invalid path | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=True, | |
downloads_path="/invalid/path/that/doesnt/exist", | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code="document.querySelector('a[href$=\".exe\"]').click()" | |
) | |
self.log_result("Invalid Download Path", False, "Should have raised an error") | |
except Exception as e: | |
self.log_result("Invalid Download Path", True, "Correctly handled invalid path") | |
# Test 3: Download with accept_downloads=False | |
try: | |
async with AsyncWebCrawler( | |
accept_downloads=False, | |
verbose=True | |
) as crawler: | |
result = await crawler.arun( | |
url="https://www.python.org/downloads/", | |
js_code="document.querySelector('a[href$=\".exe\"]').click()" | |
) | |
success = result.downloaded_files is None | |
self.log_result( | |
"Disabled Downloads", | |
success, | |
"Correctly ignored downloads" if success else "Unexpectedly downloaded files" | |
) | |
except Exception as e: | |
self.log_result("Disabled Downloads", False, str(e)) | |
async def run_all_tests(self): | |
"""Run all test cases""" | |
print("\n🧪 Running Download Tests...\n") | |
test_methods = [ | |
self.test_basic_download, | |
self.test_persistent_context_download, | |
self.test_multiple_downloads, | |
self.test_different_browsers, | |
self.test_edge_cases | |
] | |
for test in test_methods: | |
print(f"\n📝 Running {test.__doc__}...") | |
await test() | |
await asyncio.sleep(2) # Brief pause between tests | |
print("\n📊 Test Results Summary:") | |
for result in self.results: | |
print(result) | |
successes = len([r for r in self.results if '✅' in r]) | |
total = len(self.results) | |
print(f"\nTotal: {successes}/{total} tests passed") | |
self.cleanup() | |
async def main(): | |
tester = TestDownloads() | |
await tester.run_all_tests() | |
if __name__ == "__main__": | |
asyncio.run(main()) |