aiisc-watermarking-modelv3

Sleeping

App Files Files Community

jgyasu commited on Jul 8, 2024

Commit

7baf701

verified ·

1 Parent(s): aa0ee62

Upload 2 files

Browse files

Files changed (2) hide show

highlighter.py +39 -0
lcs.py +35 -0

highlighter.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import re
+def highlight_common_words(common_words, sentences):
+    color_map = {}
+    color_index = 0
+    highlighted_html = []
+    for idx, sentence in enumerate(sentences, start=1):
+        sentence_with_idx = f"{idx}. {sentence}"
+        highlighted_sentence = sentence_with_idx
+        for index, word in common_words:
+            if word not in color_map:
+                color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
+                color_index += 1
+            escaped_word = re.escape(word)
+            pattern = rf'\b{escaped_word}\b'
+            highlighted_sentence = re.sub(
+                pattern,
+                lambda m, idx=index, color=color_map[word]: (
+                    f'<span style="background-color: {color}; font-weight: bold;'
+                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
+                    f'<span style="background-color: black; color: white; border-radius: 50%;'
+                    f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
+                    f'{m.group(0)}'
+                    f'</span>'
+                ),
+                highlighted_sentence,
+                flags=re.IGNORECASE
+            )
+        highlighted_html.append(highlighted_sentence)
+    final_html = "<br><br>".join(highlighted_html)
+    return f'''
+    <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
+    <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Highlighted Sentences</h3>
+    <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
+    </div>
+    '''

lcs.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import re
+from nltk.corpus import stopwords
+def find_common_subsequences(sentence, str_list):
+    stop_words = set(stopwords.words('english'))
+    sentence = sentence.lower()
+    str_list = [s.lower() for s in str_list]
+    def is_present(subseq, str_list):
+        return all(subseq in s for s in str_list)
+    def remove_stop_words_and_special_chars(sentence):
+        sentence = re.sub(r'[^\w\s]', '', sentence)
+        words = sentence.split()
+        filtered_words = [word for word in words if word.lower() not in stop_words]
+        return " ".join(filtered_words)
+    sentence = remove_stop_words_and_special_chars(sentence)
+    str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
+    words = sentence.split()
+    common_grams = []
+    added_phrases = set()
+    index = 1
+    for n in range(5, 0, -1):
+        for i in range(len(words) - n + 1):
+            subseq = " ".join(words[i:i+n])
+            if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases):
+                common_grams.append((index, subseq))
+                added_phrases.add(subseq)
+                index += 1
+    return common_grams