File size: 3,788 Bytes
e8aad19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from typing import List, Dict

class CustomSubsetsLabel:
    def __init__(
        self
    ) -> None:

        self.html_head = """
        <html>
            <head>
                <meta charset="utf-8">
                <meta name="viewport" content="width=device-width, initial-scale=1">
                <style>
                    progress {
                        -webkit-appearance: none;
                    }
                    progress::-webkit-progress-bar {
                        background-color: #666;
                        border-radius: 7px;
                    }
                    progress {
                        width:100%;
                        height:4px;
                        border-radius: 1px;
                    }
                    #myturn {
                        display: block;
                        position: relative;
                        margin: auto;
                        width: 90%;
                        padding: 2px;
                    }
                </style>
            </head>
            <body>
        """

        self.html_footer ="</body></html>"

        self.subset_links = {
            'allwikis': "https://github.com/josecannete/wikiextractorforBERT",
            'DGT': "http://opus.nlpl.eu/DGT.php",
            'DOGC': "http://opus.nlpl.eu/DOGC.php",
            'ECB': "http://opus.nlpl.eu/ECB.php",
            'EMEA': "http://opus.nlpl.eu/EMEA.php",
            'EUBookShop': "http://opus.nlpl.eu/EUbookshop.php",
            'Europarl': "http://opus.nlpl.eu/Europarl.php",
            'GlobalVoices': "http://opus.nlpl.eu/GlobalVoices.php",
            'JRC': "http://opus.nlpl.eu/JRC-Acquis.php",
            'multiUN': "http://opus.nlpl.eu/MultiUN.php",
            'NewsCommentary11': "http://opus.nlpl.eu/News-Commentary-v11.php",
            'OpenSubtitles2018': "http://opus.nlpl.eu/OpenSubtitles-v2018.php",
            'ParaCrawl': "http://opus.nlpl.eu/ParaCrawl.php",
            'TED': "http://opus.nlpl.eu/TED2013.php",
            'UN': "http://opus.nlpl.eu/UN.php",
        }

    def __progressbar(
        self, 
        percentage: float, 
        subset: str, 
        freq: int, 
        size: int=15
    ) -> str:

        html = f"""
        <div id="myturn">
            <progress value="{int(percentage)}" max="100"></progress>
            <p style="text-align:left; font-size:{size}px; padding:0px;">
                <a href="{self.subset_links[subset]}" target="_blank">
                    <strong>{subset}</strong> <span style="font-size:{size-2}px">(Frecuencia: {freq})</span>
                </a>
                <span style="float:right;">
                    <strong>{percentage}%</strong>
                </span>
            </p>
        </div>
        """
        return html

    def __render(
        self, 
        subsets: List[str], 
        freqs: List[int], 
        percentages: List[float]
    ) -> str:

        html = ""
        for subset, freq, perc in zip(subsets, freqs, percentages):
            html += self.__progressbar(
                percentage=perc,
                subset=subset,
                freq=freq
            )

        return self.html_head + html + self.html_footer
    
    def compute(
        self, 
        subsets_dic: Dict[str, int]
    ) -> str:

        subsets_dic_info = {
            k.split()[0]:{'freq':int(k.split()[1][1:-1]),'perc':round(v*100,2)} 
            for k,v in subsets_dic.items()
        }
        
        subsets = list(subsets_dic_info.keys())
        freqs = [
            d['freq'] 
            for d in subsets_dic_info.values()
        ]
        percentages = [
            d['perc'] 
            for d in subsets_dic_info.values()
        ]

        return self.__render(subsets, freqs, percentages)