File size: 2,342 Bytes
e41b03f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import difflib as dl
import re

try:
    from src.parameters import color
except:
    from parameters import color


def strike(text):
    '''
    Adds strikesthrough the given text

    Parameters
    ----------
    text : str
        String to strikethrough

    Returns
    -------
    content : str
        Strikethrough text
    '''
    result = ''
    for c in text:
        result = result + c + '\u0336'
    return result


def strikethrough_diff(original_license_text, modified_license_text):
    '''
    Compares the two strings and strikes through all words/characters that exist in the original text
    and not in input text

    Parameters
    ----------
    original_license_text : str
        The text to compare it to. This is usually the official license text

    modified_license_text : str
        The text that is being compared with. This is usually the modified license text

    Returns
    -------
    content : str
        The strings with the uncommon words/characters strikethroughed
    '''
    original_license_text = original_license_text.replace("\n\n", " __para_break__ ")
    modified_license_text = modified_license_text.replace("\n\n", " __para_break__ ")
    original_license_tokens = re.split(" ", original_license_text.strip())
    modified_license_tokens = re.split(" ", modified_license_text.strip())

    processed_license_word_list = []

    for diff in dl.ndiff(original_license_tokens, modified_license_tokens):
        if diff.strip().endswith('__para_break__'):
            processed_license_word_list.append("\n\n")
        elif diff == "- ":
            processed_license_word_list.append((diff[2:] + ""))
        elif diff.startswith('- '):
            processed_license_word_list.append(f"""<mark style="color: {color.BLACK}; background-color:{color.RED}">{strike(diff.strip("- "))}</mark>""")
        elif diff == "+ ":
            processed_license_word_list.append((diff[2:] + ""))
        elif diff.startswith("+ "):
            processed_license_word_list.append( f"""<mark style="color: {color.BLACK}; background-color:{color.GREEN}">{diff.strip("+ ")}</mark>""")
        elif diff.startswith("? "):
            continue
        else:
            processed_license_word_list.append((diff[2:] + ""))
    return " ".join(processed_license_word_list).replace(" __para_break__ ", "\n\n")