Update app.py
Browse files
app.py
CHANGED
@@ -107,35 +107,6 @@ def find_matching_words(sentence1, sentence2):
|
|
107 |
|
108 |
|
109 |
|
110 |
-
matching_bigrams_list = []
|
111 |
-
combined_words_list = []
|
112 |
-
|
113 |
-
for paraphrase in paraphrases:
|
114 |
-
# Find matching words
|
115 |
-
matching_words = find_matching_words(main_sentence, paraphrase)
|
116 |
-
matching_bigrams_list.append(matching_words)
|
117 |
-
|
118 |
-
def combine_matching_bigrams(matching_bigrams):
|
119 |
-
combined_words = []
|
120 |
-
combined_word = ""
|
121 |
-
|
122 |
-
for i, bigram in enumerate(matching_bigrams):
|
123 |
-
if i == 0:
|
124 |
-
combined_word += ' '.join(bigram)
|
125 |
-
elif bigram[0] == matching_bigrams[i-1][1]:
|
126 |
-
combined_word += ' ' + bigram[1]
|
127 |
-
else:
|
128 |
-
combined_words.append(combined_word)
|
129 |
-
combined_word = ' '.join(bigram)
|
130 |
-
|
131 |
-
# Append the last combined word
|
132 |
-
combined_words.append(combined_word)
|
133 |
-
|
134 |
-
return combined_words
|
135 |
-
|
136 |
-
# Combine matching bigrams into single words
|
137 |
-
combined_words = combine_matching_bigrams(matching_words)
|
138 |
-
combined_words_list.append(combined_words)
|
139 |
|
140 |
def remove_overlapping(input_set):
|
141 |
sorted_set = sorted(input_set, key=len, reverse=True)
|
@@ -162,21 +133,6 @@ def find_longest_match(string1, string2):
|
|
162 |
|
163 |
return longest_match
|
164 |
|
165 |
-
common_substrings = set()
|
166 |
-
highlighted_text = []
|
167 |
-
|
168 |
-
for i in combined_words_list[0]:
|
169 |
-
for j in combined_words_list[1]:
|
170 |
-
for k in combined_words_list[2]:
|
171 |
-
for l in combined_words_list[3]:
|
172 |
-
for m in combined_words_list[4]:
|
173 |
-
matching_portion = find_longest_match(i, j)
|
174 |
-
matching_portion = find_longest_match(matching_portion, k)
|
175 |
-
matching_portion = find_longest_match(matching_portion, l)
|
176 |
-
matching_portion = find_longest_match(matching_portion, m)
|
177 |
-
if matching_portion:
|
178 |
-
common_substrings.add(matching_portion)
|
179 |
-
|
180 |
|
181 |
|
182 |
|
@@ -203,17 +159,66 @@ else:
|
|
203 |
|
204 |
main_sentence = check
|
205 |
|
|
|
206 |
st.markdown("**Main Sentence**:")
|
207 |
st.write(main_sentence)
|
208 |
|
209 |
# Generate paraphrases
|
210 |
paraphrases = paraphrase(main_sentence)
|
211 |
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
-
color_palette = ["#FF0000", "#008000", "#0000FF", "#FF00FF", "#00FFFF"]
|
216 |
-
highlighted_sentences = []
|
217 |
|
218 |
|
219 |
highlighted_sentence = main_sentence
|
|
|
107 |
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def remove_overlapping(input_set):
|
112 |
sorted_set = sorted(input_set, key=len, reverse=True)
|
|
|
133 |
|
134 |
return longest_match
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
|
138 |
|
|
|
159 |
|
160 |
main_sentence = check
|
161 |
|
162 |
+
|
163 |
st.markdown("**Main Sentence**:")
|
164 |
st.write(main_sentence)
|
165 |
|
166 |
# Generate paraphrases
|
167 |
paraphrases = paraphrase(main_sentence)
|
168 |
|
169 |
+
matching_bigrams_list = []
|
170 |
+
combined_words_list = []
|
171 |
+
|
172 |
+
for paraphrase in paraphrases:
|
173 |
+
# Find matching words
|
174 |
+
matching_words = find_matching_words(main_sentence, paraphrase)
|
175 |
+
matching_bigrams_list.append(matching_words)
|
176 |
+
|
177 |
+
def combine_matching_bigrams(matching_bigrams):
|
178 |
+
combined_words = []
|
179 |
+
combined_word = ""
|
180 |
+
|
181 |
+
for i, bigram in enumerate(matching_bigrams):
|
182 |
+
if i == 0:
|
183 |
+
combined_word += ' '.join(bigram)
|
184 |
+
elif bigram[0] == matching_bigrams[i-1][1]:
|
185 |
+
combined_word += ' ' + bigram[1]
|
186 |
+
else:
|
187 |
+
combined_words.append(combined_word)
|
188 |
+
combined_word = ' '.join(bigram)
|
189 |
+
|
190 |
+
# Append the last combined word
|
191 |
+
combined_words.append(combined_word)
|
192 |
+
|
193 |
+
return combined_words
|
194 |
+
|
195 |
+
# Combine matching bigrams into single words
|
196 |
+
combined_words = combine_matching_bigrams(matching_words)
|
197 |
+
combined_words_list.append(combined_words)
|
198 |
+
|
199 |
+
common_substrings = set()
|
200 |
+
highlighted_text = []
|
201 |
+
|
202 |
+
for i in combined_words_list[0]:
|
203 |
+
for j in combined_words_list[1]:
|
204 |
+
for k in combined_words_list[2]:
|
205 |
+
for l in combined_words_list[3]:
|
206 |
+
for m in combined_words_list[4]:
|
207 |
+
matching_portion = find_longest_match(i, j)
|
208 |
+
matching_portion = find_longest_match(matching_portion, k)
|
209 |
+
matching_portion = find_longest_match(matching_portion, l)
|
210 |
+
matching_portion = find_longest_match(matching_portion, m)
|
211 |
+
if matching_portion:
|
212 |
+
common_substrings.add(matching_portion)
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
# # Extracting longest common sequences
|
218 |
+
# longest_common_sequences = find_longest_common_sequences(main_sentence, paraphrases)
|
219 |
|
220 |
+
# color_palette = ["#FF0000", "#008000", "#0000FF", "#FF00FF", "#00FFFF"]
|
221 |
+
# highlighted_sentences = []
|
222 |
|
223 |
|
224 |
highlighted_sentence = main_sentence
|