jgyasu commited on
Commit
7baf701
·
verified ·
1 Parent(s): aa0ee62

Upload 2 files

Browse files
Files changed (2) hide show
  1. highlighter.py +39 -0
  2. lcs.py +35 -0
highlighter.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def highlight_common_words(common_words, sentences):
4
+ color_map = {}
5
+ color_index = 0
6
+ highlighted_html = []
7
+
8
+ for idx, sentence in enumerate(sentences, start=1):
9
+ sentence_with_idx = f"{idx}. {sentence}"
10
+ highlighted_sentence = sentence_with_idx
11
+
12
+ for index, word in common_words:
13
+ if word not in color_map:
14
+ color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
15
+ color_index += 1
16
+ escaped_word = re.escape(word)
17
+ pattern = rf'\b{escaped_word}\b'
18
+ highlighted_sentence = re.sub(
19
+ pattern,
20
+ lambda m, idx=index, color=color_map[word]: (
21
+ f'<span style="background-color: {color}; font-weight: bold;'
22
+ f' padding: 2px 4px; border-radius: 2px; position: relative;">'
23
+ f'<span style="background-color: black; color: white; border-radius: 50%;'
24
+ f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
25
+ f'{m.group(0)}'
26
+ f'</span>'
27
+ ),
28
+ highlighted_sentence,
29
+ flags=re.IGNORECASE
30
+ )
31
+ highlighted_html.append(highlighted_sentence)
32
+
33
+ final_html = "<br><br>".join(highlighted_html)
34
+ return f'''
35
+ <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
36
+ <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Highlighted Sentences</h3>
37
+ <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
38
+ </div>
39
+ '''
lcs.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from nltk.corpus import stopwords
3
+
4
+ def find_common_subsequences(sentence, str_list):
5
+ stop_words = set(stopwords.words('english'))
6
+ sentence = sentence.lower()
7
+
8
+ str_list = [s.lower() for s in str_list]
9
+
10
+ def is_present(subseq, str_list):
11
+ return all(subseq in s for s in str_list)
12
+
13
+ def remove_stop_words_and_special_chars(sentence):
14
+ sentence = re.sub(r'[^\w\s]', '', sentence)
15
+ words = sentence.split()
16
+ filtered_words = [word for word in words if word.lower() not in stop_words]
17
+ return " ".join(filtered_words)
18
+
19
+ sentence = remove_stop_words_and_special_chars(sentence)
20
+ str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
21
+
22
+ words = sentence.split()
23
+ common_grams = []
24
+ added_phrases = set()
25
+ index = 1
26
+
27
+ for n in range(5, 0, -1):
28
+ for i in range(len(words) - n + 1):
29
+ subseq = " ".join(words[i:i+n])
30
+ if is_present(subseq, str_list) and not any(subseq in phrase for phrase in added_phrases):
31
+ common_grams.append((index, subseq))
32
+ added_phrases.add(subseq)
33
+ index += 1
34
+
35
+ return common_grams