File size: 3,937 Bytes
96d549d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import os

import cv2
from tqdm import tqdm

import imgcomparison as ic
import mediaoutput
import sources
from analyzer import Analyzer
from slides import SlideDataHelper


class SlideSorter(Analyzer):
    """
    Sorts the slides according to their timestamp.
    """

    def __init__(self, source, outpath=None, timetable_loc=None, file_format=".png",
                 comparator=ic.AbsDiffHistComparator(0.98)):
        """
        Default initializer
        :param path: the path where the slides are located on disk
        :param comparator: the comparator to determine, if two slides
        are duplicates.
        """
        self.comparator = comparator
        self.writer = mediaoutput.NullWriter()
        if outpath is not None:
            if timetable_loc is None:
                timetable_loc = os.path.join(outpath, 'timetable.txt')
            self.file_format = file_format
            self.writer = mediaoutput.TimetableWriter(outpath, timetable_loc, self.file_format)
        self.source = source

    def sort(self):
        """
        Sorting the slides and write the new slides without duplicates
        but with a timetable to disk.
        """
        slides = []
        with tqdm(total=len(self.source), desc="Sorting Slides: ") as pbar:
            for i, slide in self.group_slides():
                pbar.update(i)
                if slide is not None:
                    slides.append(slide)

        return slides

    def group_slides(self):
        """
        Groups the slides by eliminating duplicates.
        :param slides: the list of slides possibly containing duplicates
        :return: a list of slides without duplicates
        """
        slides = []
        sorted_slides = []
        loop_counter = 0
        page_counter = 1
        for slide in self.source.contents():
            slides.append(slide)
            if slide.marked:
                continue
            found = False
            for other in slides[:-1]:
                if self.comparator.are_same(slide.img, other.img):
                    found = True
                    if other.marked:
                        other.reference.add_time(slide.time)
                        slide.reference = other.reference
                        slide.marked = True
                    else:
                        slide.reference = other
                        other.add_time(slide.time)
                        slide.marked = True
                    yield loop_counter, None

            if not found:
                slide.page_number = page_counter
                yield loop_counter, slide
                sorted_slides.append(slide)
                page_counter += 1
            loop_counter += 1
        self.writer.write(sorted_slides)
        self.writer.close()

    def analyze(self):
        for _, slide in self.group_slides():
            if slide is None:
                continue
            yield slide


if __name__ == '__main__':
    Parser = argparse.ArgumentParser(description="Slide Sorter")
    Parser.add_argument("-d", "--inputslides", help="path of the sequentially sorted slides", default="slides/")
    Parser.add_argument("-o", "--outpath", help="path to output slides", default="unique/", nargs='?')
    Parser.add_argument("-f", "--fileformat", help="file format of the output images e.g. '.jpg'",
                        default=".jpg", nargs='?')
    Parser.add_argument("-t", "--timetable",
                        help="path where the timetable should be written (default is the outpath+'timetable.txt')",
                        nargs='?', default=None)
    Args = Parser.parse_args()
    if Args.timetable is None:
        Args.timetable = os.path.join(Args.outpath, "timetable.txt")

    sorter = SlideSorter(sources.ListSource(SlideDataHelper(Args.inputslides).get_slides()), Args.outpath,
                         Args.timetable, Args.fileformat)
    sorter.sort()