from typing import Protocol, runtime_checkable import pytest from unittest.mock import MagicMock, patch from io import BytesIO #from PIL import Image import datetime import numpy as np #from streamlit.runtime.uploaded_file_manager import UploadedFile # for type hinting #from typing import List, Union from input.input_observation import InputObservation @runtime_checkable class UploadedFile(Protocol): name: str size: int type: str _file_urls: list def getvalue(self) -> bytes: ... def read(self) -> bytes: ... class MockUploadedFile(BytesIO): def __init__(self, initial_bytes: bytes, *, # enforce keyword-only arguments after now name:str, size:int, type:str): #super().__init__(*args, **kwargs) super().__init__(initial_bytes) self.name = name self.size = size self.type = type self._file_urls = [None,] @pytest.fixture def mock_uploadedFile(): class MockGUIClass(MagicMock): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) name = kwargs.get('name', 'image2.jpg') size = kwargs.get('size', 123456) type = kwargs.get('type', 'image/jpeg') self.bytes_io = MockUploadedFile( b"test data", name=name, size=size, type=type) self.get_data = MagicMock(return_value=self.bytes_io) return MockGUIClass # let's first generate a test for the mock_uploaded_file and MockUploadedFile class # - test with valid input def test_mock_uploaded_file(mock_uploadedFile): # setup values for the test (all valid) image_name = "test_image.jpg" mock_file = mock_uploadedFile(name=image_name).get_data() #print(dir(mock_file)) assert isinstance(mock_file, BytesIO) assert mock_file.name == image_name assert mock_file.size == 123456 assert mock_file.type == "image/jpeg" # now we move on to test the class InputObservation # - with valid input # - with invalid input # - with missing input def test_input_observation_valid(mock_uploadedFile): # image: ndarray # lat, lon: float # author_email: str # date, time: datetime.date, datetime.time #uploaded_file: UploadedFile (need to mock this) # image_md5: str # setup values for the test (all valid) author_email = "test@example.com" image_name = "test_image.jpg" mock_file = mock_uploadedFile(name=image_name).get_data() _date="2023-10-10" _time="10:10:10" _timezone = "+04:00" image_datetime_raw = _date + " " + _time + " " + _timezone dt = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S %z") date = dt.date() time = dt.time() tz_str = dt.strftime('%z') ## make a random image with dtype uint8 using np.random.randint image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) image_md5 = 'd1d2515e6f6ac4c5ca6dd739d5143cd4' # 32 hex chars. obs = InputObservation( image=image, latitude=12.34, longitude=56.78, author_email=author_email, time=time, date=date, timezone=tz_str, uploaded_file=mock_file, image_md5=image_md5, ) assert isinstance(obs.image, np.ndarray) assert (obs.image == image).all() assert obs.latitude == 12.34 assert obs.longitude == 56.78 assert obs.author_email == author_email assert isinstance(obs.date, datetime.date) assert isinstance(obs.time, datetime.time) assert str(obs.date) == "2023-10-10" assert str(obs.time) == "10:10:10" assert obs.timezone == tz_str assert obs.uploaded_file.name == image_name assert obs.uploaded_file.size == 123456 assert obs.uploaded_file.type == "image/jpeg" assert isinstance(obs.uploaded_file, BytesIO) #assert isinstance(obs.uploaded_file, MockUploadedFile) # is there any point in checking the type of the mock, ? # a list of tuples (strings that are the keys of "valid_inputs", expected error type) # loop over the list, and for each tuple, create a dictionary with all valid inputs, and one invalid input # assert that the function raises the expected error type invalid_input_scenarios = [ ("author_email", TypeError), ("image_name", TypeError), ("uploaded_file", TypeError), ("date", TypeError), ("time", TypeError), ("image", TypeError), ("image_md5", TypeError), ] @pytest.mark.parametrize("key, error_type", invalid_input_scenarios) def test_input_observation_invalid(key, error_type, mock_uploadedFile): # correct datatypes are: # - image: ndarray # - lat, lon: float # - author_email: str # - date, time: datetime.date, datetime.time # - uploaded_file: UploadedFile (need to mock this) # - image_md5: str # the most critical/likely to go wrong would presumably be # - date, time (strings not datetime objects) # - lat, lon (strings not numbers) # - image (not ndarray, maybe accidentally a PIL object or maybe the filename) # - uploaded_file (not UploadedFile, maybe a string, or maybe the ndarray) # check it fails when any of the datatypes are wrong, # even if the rest are all good want to loop over the inputs, take each one # from a bad list, and all others from a good list, and assert fails for # each one # set up the good and bad inputs _date="2023-10-10" _time="10:10:10" image_datetime_raw = _date + " " + _time fname = "test_image.jpg" image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S") valid_inputs = { "author_email": "test@example.com", "image_name": "test_image.jpg", "uploaded_file": mock_uploadedFile(name=fname).get_data(), "date": dt_ok.date(), "time": dt_ok.time(), "image": image, "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars. } invalid_inputs = { "author_email": "@example", "image_name": 45, "uploaded_file": image, "date": _date, "time": _time, "image": fname, "image_md5": 45643 } # test a valid set of inputs, minus the target key, substituted for something invalid inputs = valid_inputs.copy() inputs[key] = invalid_inputs[key] with pytest.raises(error_type): obs = InputObservation(**inputs) # now test the same key set to None inputs = valid_inputs.copy() inputs[key] = None with pytest.raises(error_type): obs = InputObservation(**inputs) # we can take a similar approach to test equality. # here, construct two dicts, each with valid inputs but all elements different. # loop over the keys, and construct two InputObservations that differ on that key only. # asser the expected output message. # ah, it is the diff func that prints a message. Here we just assert boolean. # we currently expect differences on time to be ignored. inequality_keys = [ ("author_email", False), ("uploaded_file", False), ("date", False), #("time", True), pytest.param("time", False, marks=pytest.mark.xfail(reason="Time is currently ignored in __eq__")), ("image", False), ("image_md5", False), ] @pytest.mark.parametrize("key, expect_equality", inequality_keys) def test_input_observation_equality(key, expect_equality, mock_uploadedFile): # set up the two sets of good inputs _date1 = "2023-10-10" _time1 = "10:10:10" image_datetime_raw1 = _date1 + " " + _time1 fname1 = "test_image.jpg" image1 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) dt1 = datetime.datetime.strptime(image_datetime_raw1, "%Y-%m-%d %H:%M:%S") _date2 = "2023-10-11" _time2 = "12:13:14" image_datetime_raw2 = _date2 + " " + _time2 fname2 = "test_image.jpg" image2 = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) dt2 = datetime.datetime.strptime(image_datetime_raw2, "%Y-%m-%d %H:%M:%S") valid_inputs1 = { "author_email": "test@example.com", #"image_name": "test_image.jpg", "uploaded_file": mock_uploadedFile(name=fname1).get_data(), "date": dt1.date(), "time": dt1.time(), "image": image1, "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars. } valid_inputs2 = { "author_email": "example@whales.org", #"image_name": "another.jpg", "uploaded_file": mock_uploadedFile(name=fname2).get_data(), "date": dt2.date(), "time": dt2.time(), "image": image2, "image_md5": 'cdb235587bdee5915d6ccfa52ca9f3ac', # 32 hex chars. } nearly_same_inputs = valid_inputs1.copy() nearly_same_inputs[key] = valid_inputs2[key] obs1 = InputObservation(**valid_inputs1) obs2 = InputObservation(**nearly_same_inputs) if expect_equality is True: assert obs1 == obs2 else: assert obs1 != obs2 # now let's test the setter methods (set_top_predictions, set_selected_class, set_class_overriden) # ideally we get a fixture that produces a good / valid InputObservation object # and from there, just test the setters + their expected changes / side effects @pytest.fixture def good_datadict_for_input_observation(mock_uploadedFile) -> dict: # set up the good and bad inputs _date="2023-10-10" _time="10:10:10" _timezone = "+04:00" image_datetime_raw = _date + " " + _time + " " + _timezone #dt = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S %z") fname = "test_image.jpg" image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) dt_ok = datetime.datetime.strptime(image_datetime_raw, "%Y-%m-%d %H:%M:%S %z") tz_str = dt_ok.strftime('%z') valid_inputs = { "author_email": "test@example.com", "uploaded_file": mock_uploadedFile(name=fname).get_data(), "date": dt_ok.date(), "time": dt_ok.time(), "timezone": tz_str, "image": image, "image_md5": 'd1d2515e6f6ac4c5ca6dd739d5143cd4', # 32 hex chars. "image_datetime_raw": image_datetime_raw, "latitude": 12.34, "longitude": 56.78, } return valid_inputs @pytest.fixture def good_input_observation(good_datadict_for_input_observation) -> InputObservation: observation = InputObservation(**good_datadict_for_input_observation) return observation # def test_input_observation__set_top_predictions_populated(good_input_observation): obs = good_input_observation # before setting, expect empty list assert obs.top_predictions == [] assert obs.selected_class == None # set >0, # - expect to find the same list in the property/attribute # - expect to find the first element in the selected_class top_predictions = ["beluga", "blue_whale", "common_dolphin"] obs.set_top_predictions(top_predictions) assert len(obs.top_predictions) == 3 assert obs.top_predictions == top_predictions assert obs.selected_class == "beluga" def test_input_observation__set_top_predictions_unpopulated(good_input_observation): obs = good_input_observation # before setting, expect empty list assert obs.top_predictions == [] assert obs.selected_class == None # set to empty list, # - expect to find the same list in the property/attribute # - expect to find selected_class to be None top_predictions = [] obs.set_top_predictions(top_predictions) assert len(obs.top_predictions) == 0 assert obs.top_predictions == [] assert obs.selected_class == None def test_input_observation__set_selected_class_default(good_input_observation): obs = good_input_observation # before setting, expect empty list assert obs.top_predictions == [] assert obs.selected_class == None assert obs.class_overriden == False # set >0, and then set_selected_class to the first element # - expect to find the same list in the property/attribute # - expect to find the first element in the selected_class # - expect class_overriden to be False top_predictions = ["beluga", "blue_whale", "common_dolphin"] obs.set_top_predictions(top_predictions) obs.set_selected_class(top_predictions[0]) assert len(obs.top_predictions) == 3 assert obs.top_predictions == top_predictions assert obs.selected_class == "beluga" def test_input_observation__set_selected_class_override(good_input_observation): obs = good_input_observation # before setting, expect empty list assert obs.top_predictions == [] assert obs.selected_class == None assert obs.class_overriden == False # set >0, and then set_selected_class to something out of list # - expect to find the same list in the property/attribute # - expect to find the first element in the selected_class # - expect class_overriden to be False top_predictions = ["beluga", "blue_whale", "common_dolphin"] obs.set_top_predictions(top_predictions) obs.set_selected_class("brydes_whale") assert len(obs.top_predictions) == 3 assert obs.top_predictions == top_predictions assert obs.selected_class == "brydes_whale" assert obs.class_overriden == True # now we want to test to_dict, make sure it is compliant with the data to be # transmitted to the dataset/server def test_input_observation_to_dict(good_datadict_for_input_observation): obs = InputObservation(**good_datadict_for_input_observation) # set >0, and then set_selected_class to something out of list # - expect to find the same list in the property/attribute # - expect to find the first element in the selected_class # - expect class_overriden to be False top_predictions = ["beluga", "blue_whale", "common_dolphin"] selected = "brydes_whale" obs.set_top_predictions(top_predictions) obs.set_selected_class(selected) # as a first point, we expect the dict to be like the input dict... expected_output = good_datadict_for_input_observation.copy() # ... with a few changes # - date and time get converted to str(date) str(time) expected_output["date"] = str(expected_output["date"]) expected_output["time"] = str(expected_output["time"]) # - image_filename comes from uploaded_file.name expected_output["image_filename"] = expected_output["uploaded_file"].name # - uploaded_file and image are not in the transmitted data del expected_output["uploaded_file"] del expected_output["image"] # - the classification results should be as set above expected_output["top_prediction"] = top_predictions[0] expected_output["selected_class"] = selected expected_output["class_overriden"] = True print(obs.to_dict()) assert obs.to_dict() == expected_output # expected = { # 'image_filename': 'test_image.jpg', 'image_md5': # 'd1d2515e6f6ac4c5ca6dd739d5143cd4', 'latitude': 12.34, 'longitude': # 56.78, 'author_email': 'test@example.com', 'image_datetime_raw': # '2023-10-10 10:10:10', 'date': '2023-10-10', 'time': '10:10:10', # 'selected_class': 'brydes_whale', 'top_prediction': 'beluga', # 'class_overriden': True # }