File size: 4,170 Bytes
a03c9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import unittest
from unittest.mock import patch
from io import BytesIO
from utils.utils import download_and_extract
from utils.utils import get_checksum
from utils.utils import merge_file_lists
from utils.utils import reindex_file_list_keys
from utils.utils import remove_ids_from_file_list
from utils.utils import deduplicate_splits


class TestMergeFileListFunctions(unittest.TestCase):

    def test_merge_file_lists(self):
        # Define some example input dictionaries
        file_list_1 = {1: 'file1.txt', 2: 'file2.txt'}
        file_list_2 = {3: 'file3.txt', 4: 'file4.txt'}
        file_list_3 = {5: 'file5.txt', 6: 'file6.txt'}

        # Call the merge_file_lists function with the example input
        merged_file_list = merge_file_lists([file_list_1, file_list_2, file_list_3])

        # Check that the merged dictionary has the correct length and keys/values
        self.assertEqual(len(merged_file_list), 6)
        self.assertEqual(merged_file_list[0], 'file1.txt')
        self.assertEqual(merged_file_list[1], 'file2.txt')
        self.assertEqual(merged_file_list[2], 'file3.txt')
        self.assertEqual(merged_file_list[3], 'file4.txt')
        self.assertEqual(merged_file_list[4], 'file5.txt')
        self.assertEqual(merged_file_list[5], 'file6.txt')

    def test_reindex_file_list_keys(self):
        file_list = {'a': {'id': 1, 'name': 'file1'}, 'b': {'id': 2, 'name': 'file2'}}
        expected_reindexed = {0: {'id': 1, 'name': 'file1'}, 1: {'id': 2, 'name': 'file2'}}
        reindexed = reindex_file_list_keys(file_list)
        self.assertEqual(reindexed, expected_reindexed)

    def test_remove_ids_from_file_list(self):
        file_list = {
            'a': {
                'music_id': 123,
                'name': 'file1'
            },
            'b': {
                'music_id': 222,
                'name': 'file2'
            }
        }
        selected_ids = [123]
        expected_filtered = {0: {'music_id': 222, 'name': 'file2'}}
        filtered = remove_ids_from_file_list(file_list, selected_ids, reindex=True)
        self.assertEqual(filtered, expected_filtered)


class TestGetChecksum(unittest.TestCase):

    def test_get_checksum_z(self):
        # Create a temporary file with some content
        file_name = "temp_test_file.txt"
        with open(file_name, "w") as f:
            f.write("This is a test file")

        # Calculate the expected checksum using an online md5 calculator or a known md5 value
        expected_checksum = "0b26e313ed4a7ca6904b0e9369e5b957"

        # Call the get_checksum function
        calculated_checksum = get_checksum(file_name)

        # Compare the expected and calculated checksums
        self.assertEqual(expected_checksum, calculated_checksum)

        # Clean up the temporary file
        os.remove(file_name)


class TestDeduplicateSplits(unittest.TestCase):

    def test_deduplicate_splits(self):
        # Create sample file lists for splits A and B
        file_list_a = {
            'split1': {
                'some_id': 1,
                'file_name': 'a.jpg'
            },
            'split2': {
                'some_id': 2,
                'file_name': 'b.jpg'
            },
            'split3': {
                'some_id': 3,
                'file_name': 'c.jpg'
            }
        }
        file_list_b = {
            'split4': {
                'some_id': 2,
                'file_name': 'd.jpg'
            },
            'split5': {
                'some_id': 3,
                'file_name': 'e.jpg'
            },
            'split6': {
                'some_id': 6,
                'file_name': 'f.jpg'
            }
        }

        # Remove duplicates between split A and split B
        filtered_file_list_a = deduplicate_splits(file_list_a, file_list_b, reindex=False)

        # Check that the correct IDs have been removed from split A
        expected_file_list_a = {
            'split1': {
                'some_id': 1,
                'file_name': 'a.jpg'
            },
        }
        self.assertDictEqual(filtered_file_list_a, expected_file_list_a)


if __name__ == '__main__':
    unittest.main()