File size: 3,470 Bytes
96d549d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b16bc4
96d549d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import argparse
import os

import cv2
from tqdm import tqdm

import imgcomparison as ic
import mediaoutput
import sources
from analyzer import Analyzer
from slides import SlideDataHelper


class SlideSorter(Analyzer):
    """
    Sorts the slides according to their timestamp.
    """

    def __init__(self, source, comparator=ic.AbsDiffHistComparator(0.98)):
        """
        Default initializer
        :param path: the path where the slides are located on disk
        :param comparator: the comparator to determine, if two slides
        are duplicates.
        """
        self.comparator = comparator
        self.source = source

    def sort(self):
        """
        Sorting the slides and write the new slides without duplicates
        but with a timetable to disk.
        """
        slides = []
        with tqdm(total=len(self.source), desc="Sorting Slides: ") as pbar:
            for i, slide in self.group_slides():
                pbar.update(i)
                if slide is not None:
                    slides.append(slide)

        return slides

    def group_slides(self):
        """
        Groups the slides by eliminating duplicates.
        :param slides: the list of slides possibly containing duplicates
        :return: a list of slides without duplicates
        """
        slides = []
        sorted_slides = []
        loop_counter = 0
        page_counter = 1
        for slide in self.source.contents():
            slides.append(slide)
            if slide.marked:
                continue
            found = False
            for other in slides[:-1]:
                if self.comparator.are_same(slide.img, other.img):
                    found = True
                    if other.marked:
                        other.reference.add_time(slide.time)
                        slide.reference = other.reference
                        slide.marked = True
                    else:
                        slide.reference = other
                        other.add_time(slide.time)
                        slide.marked = True
                    yield loop_counter, None

            if not found:
                slide.page_number = page_counter
                yield loop_counter, slide
                sorted_slides.append(slide)
                page_counter += 1
            loop_counter += 1

    def analyze(self):
        for _, slide in self.group_slides():
            if slide is None:
                continue
            yield slide


if __name__ == '__main__':
    Parser = argparse.ArgumentParser(description="Slide Sorter")
    Parser.add_argument("-d", "--inputslides", help="path of the sequentially sorted slides", default="slides/")
    Parser.add_argument("-o", "--outpath", help="path to output slides", default="unique/", nargs='?')
    Parser.add_argument("-f", "--fileformat", help="file format of the output images e.g. '.jpg'",
                        default=".jpg", nargs='?')
    Parser.add_argument("-t", "--timetable",
                        help="path where the timetable should be written (default is the outpath+'timetable.txt')",
                        nargs='?', default=None)
    Args = Parser.parse_args()
    if Args.timetable is None:
        Args.timetable = os.path.join(Args.outpath, "timetable.txt")

    sorter = SlideSorter(sources.ListSource(SlideDataHelper(Args.inputslides).get_slides()), Args.outpath,
                         Args.timetable, Args.fileformat)
    sorter.sort()