Upload 2 files
Browse files- highlighter.py +39 -0
- lcs.py +35 -0
highlighter.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def highlight_common_words(common_words, sentences):
|
4 |
+
color_map = {}
|
5 |
+
color_index = 0
|
6 |
+
highlighted_html = []
|
7 |
+
|
8 |
+
for idx, sentence in enumerate(sentences, start=1):
|
9 |
+
sentence_with_idx = f"{idx}. {sentence}"
|
10 |
+
highlighted_sentence = sentence_with_idx
|
11 |
+
|
12 |
+
for index, word in common_words:
|
13 |
+
if word not in color_map:
|
14 |
+
color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
|
15 |
+
color_index += 1
|
16 |
+
escaped_word = re.escape(word)
|
17 |
+
pattern = rf'\b{escaped_word}\b'
|
18 |
+
highlighted_sentence = re.sub(
|
19 |
+
pattern,
|
20 |
+
lambda m, idx=index, color=color_map[word]: (
|
21 |
+
f'<span style="background-color: {color}; font-weight: bold;'
|
22 |
+
f' padding: 2px 4px; border-radius: 2px; position: relative;">'
|
23 |
+
f'<span style="background-color: black; color: white; border-radius: 50%;'
|
24 |
+
f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
|
25 |
+
f'{m.group(0)}'
|
26 |
+
f'</span>'
|
27 |
+
),
|
28 |
+
highlighted_sentence,
|
29 |
+
flags=re.IGNORECASE
|
30 |
+
)
|
31 |
+
highlighted_html.append(highlighted_sentence)
|
32 |
+
|
33 |
+
final_html = "<br><br>".join(highlighted_html)
|
34 |
+
return f'''
|
35 |
+
<div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
|
36 |
+
<h3 style="margin-top: 0; font-size: 1em; color: #111827;">Highlighted Sentences</h3>
|
37 |
+
<div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
|
38 |
+
</div>
|
39 |
+
'''
|
lcs.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from nltk.corpus import stopwords
|
3 |
+
|
4 |
+
def find_common_subsequences(sentence, str_list):
|
5 |
+
stop_words = set(stopwords.words('english'))
|
6 |
+
sentence = sentence.lower()
|
7 |
+
|
8 |
+
str_list = [s.lower() for s in str_list]
|
9 |
+
|
10 |
+
def is_present(subseq, str_list):
|
11 |
+
return all(subseq in s for s in str_list)
|
12 |
+
|
13 |
+
def remove_stop_words_and_special_chars(sentence):
|
14 |
+
sentence = re.sub(r'[^\w\s]', '', sentence)
|
15 |
+
words = sentence.split()
|
16 |
+
filtered_words = [word for word in words if word.lower() not in stop_words]
|
17 |
+
return " ".join(filtered_words)
|
18 |
+
|
19 |
+
sentence = remove_stop_words_and_special_chars(sentence)
|
20 |
+
str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
|
21 |
+
|
22 |
+
words = sentence.split()
|
23 |
+
common_grams = []
|
24 |
+
added_phrases = set()
|
25 |
+
index = 1
|
26 |
+
|
27 |
+
for n in range(5, 0, -1):
|
28 |
+
for i in range(len(words) - n + 1):
|
29 |
+
subseq = " ".join(words[i:i+n])
|
30 |
+
if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases):
|
31 |
+
common_grams.append((index, subseq))
|
32 |
+
added_phrases.add(subseq)
|
33 |
+
index += 1
|
34 |
+
|
35 |
+
return common_grams
|