import pytest from PIL import Image import numpy as np from app import OCRDataCollector, sentences import io @pytest.fixture def collector(): return OCRDataCollector() def test_get_random_text_block(collector): # Test that we get a non-empty string text_block = collector.get_random_text_block(max_words=50) assert isinstance(text_block, str) assert len(text_block) > 0 # Test that the text block contains content from our sentences assert any(sentence in text_block for sentence in sentences) # Test that we get different blocks (probabilistic, but very likely) blocks = [collector.get_random_text_block(max_words=50) for _ in range(5)] assert len(set(blocks)) > 1, "Random blocks should be different" def test_skip_text(collector): # Test that we get a different text block when skipping current_text = collector.get_random_text_block(max_words=50) new_text = collector.get_random_text_block(max_words=50) assert isinstance(new_text, str) assert len(new_text) > 0 assert new_text != current_text # This is probabilistic but very likely def test_submit_image(collector): # Create a dummy test image using numpy array img_array = np.zeros((100, 100, 3), dtype=np.uint8) img_array.fill(255) # White image # Convert numpy array to PIL Image test_image = Image.fromarray(img_array) # Test the current text block current_text = collector.get_random_text_block(max_words=50) # Test submission with valid image new_text = collector.submit_image(test_image, current_text) assert isinstance(new_text, str) assert len(new_text) > 0 # Should not have added to collected_pairs since no username provided assert len(collector.collected_pairs) == 0 # Test submission with no image new_text = collector.submit_image(None, current_text) assert isinstance(new_text, str) assert len(new_text) > 0 assert len(collector.collected_pairs) == 0