File size: 4,379 Bytes
03c0888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import sys
import pytest
import asyncio
import base64
from PIL import Image
import io

# Add the parent directory to the Python path
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from crawl4ai.async_webcrawler import AsyncWebCrawler

@pytest.mark.asyncio
async def test_basic_screenshot():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://example.com"  # A static website
        result = await crawler.arun(url=url, bypass_cache=True, screenshot=True)
        
        assert result.success
        assert result.screenshot is not None
        
        # Verify the screenshot is a valid image
        image_data = base64.b64decode(result.screenshot)
        image = Image.open(io.BytesIO(image_data))
        assert image.format == "PNG"

@pytest.mark.asyncio
async def test_screenshot_with_wait_for():
    async with AsyncWebCrawler(verbose=True) as crawler:
        # Using a website with dynamic content
        url = "https://www.youtube.com"
        wait_for = "css:#content"  # Wait for the main content to load
        
        result = await crawler.arun(
            url=url, 
            bypass_cache=True, 
            screenshot=True, 
            wait_for=wait_for
        )
        
        assert result.success
        assert result.screenshot is not None
        
        # Verify the screenshot is a valid image
        image_data = base64.b64decode(result.screenshot)
        image = Image.open(io.BytesIO(image_data))
        assert image.format == "PNG"
        
        # You might want to add more specific checks here, like image dimensions
        # or even use image recognition to verify certain elements are present

@pytest.mark.asyncio
async def test_screenshot_with_js_wait_for():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.amazon.com"
        wait_for = "js:() => document.querySelector('#nav-logo-sprites') !== null"
        
        result = await crawler.arun(
            url=url, 
            bypass_cache=True, 
            screenshot=True, 
            wait_for=wait_for
        )
        
        assert result.success
        assert result.screenshot is not None
        
        image_data = base64.b64decode(result.screenshot)
        image = Image.open(io.BytesIO(image_data))
        assert image.format == "PNG"

@pytest.mark.asyncio
async def test_screenshot_without_wait_for():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nytimes.com"  # A website with lots of dynamic content
        
        result = await crawler.arun(url=url, bypass_cache=True, screenshot=True)
        
        assert result.success
        assert result.screenshot is not None
        
        image_data = base64.b64decode(result.screenshot)
        image = Image.open(io.BytesIO(image_data))
        assert image.format == "PNG"

@pytest.mark.asyncio
async def test_screenshot_comparison():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.reddit.com"
        wait_for = "css:#SHORTCUT_FOCUSABLE_DIV"
        
        # Take screenshot without wait_for
        result_without_wait = await crawler.arun(
            url=url, 
            bypass_cache=True, 
            screenshot=True
        )
        
        # Take screenshot with wait_for
        result_with_wait = await crawler.arun(
            url=url, 
            bypass_cache=True, 
            screenshot=True, 
            wait_for=wait_for
        )
        
        assert result_without_wait.success and result_with_wait.success
        assert result_without_wait.screenshot is not None
        assert result_with_wait.screenshot is not None
        
        # Compare the two screenshots
        image_without_wait = Image.open(io.BytesIO(base64.b64decode(result_without_wait.screenshot)))
        image_with_wait = Image.open(io.BytesIO(base64.b64decode(result_with_wait.screenshot)))
        
        # This is a simple size comparison. In a real-world scenario, you might want to use
        # more sophisticated image comparison techniques.
        assert image_with_wait.size[0] >= image_without_wait.size[0]
        assert image_with_wait.size[1] >= image_without_wait.size[1]

# Entry point for debugging
if __name__ == "__main__":
    pytest.main([__file__, "-v"])